/* ========================================================================== */
/* =========================== PEER FX - ANALYSIS =========================== */
/* ========================================================================== */

*** Initial information and settings

clear all
macro drop _all
program drop _all
eststo clear
version 14.2
set matsize 10000


* Set directory: set the main project root directory here,
* /Data/ folder is a first-level subdirectory
global maindir "/home/someuser/somefolder/mainprojectdir"
* Any directory to direct output to
global outputdir "/home/someuser/somefolder/output"

* Load the data
use "$maindir/Data/peerfxdata.dta" , clear

* Generate control vectors
global control   "female native i.age"
global clcontrol "clmeanfemale clmeannative clmeanage"
global slcontrol "slmeanfemale slmeannative slmeanage"


*** Summary Table

* Outcome
sum std_composite std_pptmathematik std_pptdeutsch composite

* Treatment
sum snchild snchildren shsnchildren intsnchild disruptive disrchildren learnproblem learnchildren

* Controls
sum female native age clsizefix ageatfirstreg
distinct classfix
distinct schoolxtrack
distinct lehrerid

* Mid-term outcomes
preserve
keep if swjahr<2016
keep if postcompulsory!=.
qui tab educhoice, gen(d_educhoice)
sum d_*
sum postcompulsory vetvsaca vetquality
restore

* Long-term outcomes
preserve
keep if avg_employed!=.
keep if highest_employed!=.
keep if bfsdduree2016!=.
egen qtincome1 = xtile(avg_monthly)     , nq(100)
egen qtincome2 = xtile(highest_monthly) , nq(100)
egen qtincome3 = xtile(monthlyinc2016)  , nq(100)
drop if qtincome1==1
drop if qtincome2==1
drop if qtincome3==1
drop if qtincome1==100
drop if qtincome2==100
drop if qtincome3==100
sum avg_employed highest_employed bfsdduree2016 avg_monthly highest_monthly monthlyinc2016
restore

* Classroom level covariates
sum $clcontrol

* Cohort-level covariates
sum $slcontrol



******************** ATTRITION GRAPH ********************


*** Generate attrition indicator

* In the education data
preserve
gen birthyear = year(geburtsdatum)
gen attrition1 = 0
replace attrition1 = 1 if postcompulsory==.
replace attrition1 = 1 if postcompulsory==0

* In the social security data
gen attrition2 = 0
replace attrition2 = 1 if avg_employed==.
replace attrition2 = 1 if highest_employed==.
replace attrition2 = 1 if bfsdduree2016==.
tab attrition1
tab attrition2

bys swjahr: egen avg_attr1 = mean(attrition1)
bys swjahr: egen avg_attr2 = mean(attrition2)
twoway  ///
    (connected avg_attr1 swjahr , color(black) msymbol(diamond) ) ///
    (connected avg_attr2 swjahr , color(gray ) msymbol(circle ) ) ///
    , ///
    graphregion(color(white)) bgcolor(white)  ///
    xtitle("School cohort", height(4)) ///
    ytitle("Pr(attrition)", height(5)) ///
    legend(order(1 "Post-compulsory education data" 2 "Labor market data") symxsize(5) cols(2) region(color(white))) ///
    ylabel(, nogrid labsize(*0.9)) xlabel(2008(1)2017, nogrid labsize(*0.9))
drop avg_attr*
graph export "$outputdir/figures/attrition.pdf" , replace
restore



******************** GRAPHICAL EVIDENCE ********************


*** Distribution of test scores

hist std_composite , fcolor(gs12) lcolor(gs8) ///
    xtitle("Standardized test score", size(*0.9)) ytitle("Density", size(*0.9)) ///
    graphregion(color(white)) bgcolor(white) ///
    xlabel(, labsize(*0.8) nogrid) ylabel(, labsize(*0.8) nogrid) ///
    addplot( ///
    	kdensity std_composite if snchild == 0, ///
            graphregion(color(white)) bgcolor(white) ylab(,nogrid) color(black) lp(solid) lw(*0.7) ///
            || ///
    	kdensity std_composite if snchild == 1, ///
            graphregion(color(white)) bgcolor(white) ylab(,nogrid) color(black) lp(dash) lw(*0.7) ///
            ) ///
	legend(order(1 "Empirical density, all students" 2 "Kernel density estimate, non-SN students" ///
		3 "Kernel density estimate, SN students") col(1) symxsize(5) region(color(white)))
graph export "$outputdir/figures/testscore.pdf" , replace


*** Test scores by share of SN peers

twoway (lpoly std_composite shsnchildren if (snchild==0&shsnchildren<=0.8), lcolor(black) lpattern(solid)) ///
	   (lpoly std_composite shsnchildren if (snchild==1&shsnchildren<=0.8), lcolor(black) lpattern(dash)) ///
	, ///
    yline(0, lpattern(dot) lcolor(black)) ///
    xtitle("Proportion of SN peers", size(*0.9)) ytitle("Standardized test score", size(*0.9)) ///
    graphregion(color(white)) bgcolor(white) ///
    xlabel(, labsize(*0.8) nogrid) ylabel(, labsize(*0.8) nogrid) ///
    legend(order(1 "Non-SN students" 2 "SN students") symxsize(8) region(color(white)))
graph export "$outputdir/figures/scorebyshare.pdf" , replace


*** Class sizes, SN per class, SN Shares

hist clsizefix , disc percent fcolor(gs12) lcolor(gs8) ///
    xtitle("Class size") ytitle("Percent") ///
    graphregion(color(white)) bgcolor(white) title("A. Distribution of class size", color(black) size(*0.8)) ///
    xlabel(, labsize(*0.9) nogrid) ylabel(0(5)15, labsize(*0.9) nogrid) name(csize, replace)
hist snchildren , bin(15) percent fcolor(gs12) lcolor(gs8) ///
    xtitle("Number of SN peers") ytitle("Percent") ///
    graphregion(color(white)) bgcolor(white) title("B. Distribution SN peers (in #)", color(black) size(*0.8)) ///
    xlabel(, labsize(*0.9) nogrid) ylabel(0(5)15, labsize(*0.9) nogrid) name(numbers, replace)
hist shsnchildren , bin(15) percent fcolor(gs12) lcolor(gs8) ///
    xtitle("Proportion of SN peers") ytitle("Percent") ///
    graphregion(color(white)) bgcolor(white) title("C. Distribution SN peers (in %)", color(black) size(*0.8)) ///
    xlabel(, labsize(*0.9) nogrid) ylabel(0(5)15, labsize(*0.9) nogrid) name(shares, replace)
graph combine csize numbers shares , col(1) graphregion(color(white)) ysize(9) ycommon
graph export "$outputdir/figures/distributions.pdf" , replace


*** Distribution of SN peers with and without school-track-year FE

qui reg shsnchildren
predict share_nofe , res 
qui areg shsnchildren , abs(schoolxtrack)
predict share_stfe , res 
qui areg shsnchildren , abs(schoolxtrackxyear)
predict share_styfe , res 
sum shsnchildren share_nofe share_stfe share_styfe
kdensity share_nofe , kernel(gau) bwidth(0.025) ///
         addplot((kdensity share_stfe , kernel(gau) bwidth(0.025)) (kdensity share_styfe , kernel(gau) bwidth(0.025)) ) ///
         graphregion(color(white)) bgcolor(white) note("") xline(0, lcolor(black) lpattern(dot)) ///
         xtitle("Proportion of SN peers", size(*0.9)) ytitle("Density", size(*0.9)) ///
         xlabel(, labsize(*0.8) nogrid) ylabel(, labsize(*0.8) nogrid) title("")  ///
         legend(order(1 "No FE (unconditional, mean-centered)" 2 "With school-track FE" 3 "With school-track-year FE") ///
         col(1) symxsize(7) region(color(white)))
graph export "$outputdir/figures/sharesFE.pdf" , replace
drop share_nofe share_stfe share_styfe



******************** BALANCING TESTS ********************


*** Classroom: Does the share of SN classmates predict individual characteristics?

* Gender
areg female shsnchildren snchild clsizefix                                     , abs(schoolxtrackxyear) cl(schoolxtrackxyear)
areg female shsnchildren snchild clsizefix native i.age                        , abs(schoolxtrackxyear) cl(schoolxtrackxyear)
areg female shsnchildren snchild clsizefix native i.age clmeannative clmeanage , abs(schoolxtrackxyear) cl(schoolxtrackxyear)

* Native
areg native shsnchildren snchild clsizefix                                     , abs(schoolxtrackxyear) cl(schoolxtrackxyear)
areg native shsnchildren snchild clsizefix female i.age                        , abs(schoolxtrackxyear) cl(schoolxtrackxyear)
areg native shsnchildren snchild clsizefix female i.age clmeanfemale clmeanage , abs(schoolxtrackxyear) cl(schoolxtrackxyear)

* Age (old at test)
areg oldattest shsnchildren snchild clsizefix                                         , abs(schoolxtrackxyear) cl(schoolxtrackxyear)
areg oldattest shsnchildren snchild clsizefix female native                           , abs(schoolxtrackxyear) cl(schoolxtrackxyear)
areg oldattest shsnchildren snchild clsizefix female native clmeanfemale clmeannative , abs(schoolxtrackxyear) cl(schoolxtrackxyear)

* Joint significance
areg shsnchildren snchild female native oldattest clsizefix , abs(schoolxtrackxyear) cl(schoolxtrackxyear)
test native female oldattest


*** Classroom: Test for non-SN

* Gender
areg female shsnchildren snchild clsizefix native i.age clmeannative clmeanage if snchild==0 , abs(schoolxtrackxyear) cl(schoolxtrackxyear)

* Native
areg native shsnchildren snchild clsizefix female i.age clmeanfemale clmeanage if snchild==0 , abs(schoolxtrackxyear) cl(schoolxtrackxyear)

* Age (old at test)
areg oldattest shsnchildren snchild clsizefix female native clmeanfemale clmeannative if snchild==0 , abs(schoolxtrackxyear) cl(schoolxtrackxyear)


*** Classroom: Test for SN

* Gender
areg female shsnchildren snchild clsizefix native i.age clmeannative clmeanage if snchild==1 , abs(schoolxtrackxyear) cl(schoolxtrackxyear)

* Native
areg native shsnchildren snchild clsizefix female i.age clmeanfemale clmeanage if snchild==1 , abs(schoolxtrackxyear) cl(schoolxtrackxyear)

* Age (old at test)
areg oldattest shsnchildren snchild clsizefix female native clmeanfemale clmeannative if snchild==1 , abs(schoolxtrackxyear) cl(schoolxtrackxyear)


*** Cohort: Does the share of SN classmates predict individual characteristics?

* Gender
reghdfe female    sshare_snchild snchild cohortsize                                     , abs(swjahr schoolxtrack) cl(cohort)
reghdfe female    sshare_snchild snchild cohortsize native i.age                        , abs(swjahr schoolxtrack) cl(cohort)
reghdfe female    sshare_snchild snchild cohortsize native i.age slmeannative slmeanage , abs(swjahr schoolxtrack) cl(cohort)

* Native
reghdfe native    sshare_snchild snchild cohortsize                                     , abs(swjahr schoolxtrack) cl(cohort)
reghdfe native    sshare_snchild snchild cohortsize female i.age                        , abs(swjahr schoolxtrack) cl(cohort)
reghdfe native    sshare_snchild snchild cohortsize female i.age slmeanfemale slmeanage , abs(swjahr schoolxtrack) cl(cohort)

* Age (old at test)
reghdfe oldattest sshare_snchild snchild cohortsize                                         , abs(swjahr schoolxtrack) cl(cohort)
reghdfe oldattest sshare_snchild snchild cohortsize female native                           , abs(swjahr schoolxtrack) cl(cohort)
reghdfe oldattest sshare_snchild snchild cohortsize female native slmeanfemale slmeannative , abs(swjahr schoolxtrack) cl(cohort)

* Joint significance
reghdfe sshare_snchild snchild female native oldattest cohortsize , abs(swjahr schoolxtrack) cl(cohort)
test native female oldattest


*** Cohort: Test for non-SN

* Gender
reghdfe female    sshare_snchild snchild cohortsize native i.age slmeannative slmeanage if snchild==0 , abs(swjahr schoolxtrack) cl(cohort)

* Native
reghdfe native    sshare_snchild snchild cohortsize female i.age slmeanfemale slmeanage if snchild==0 , abs(swjahr schoolxtrack) cl(cohort)

* Age (old at test)
reghdfe oldattest sshare_snchild snchild cohortsize female native slmeanfemale slmeannative if snchild==0 , abs(swjahr schoolxtrack) cl(cohort)


*** Cohort: Test for SN

* Gender
reghdfe female    sshare_snchild snchild cohortsize native i.age slmeannative slmeanage if snchild==1 , abs(swjahr schoolxtrack) cl(cohort)

* Native
reghdfe native    sshare_snchild snchild cohortsize female i.age slmeanfemale slmeanage if snchild==1 , abs(swjahr schoolxtrack) cl(cohort)

* Age (old at test)
reghdfe oldattest sshare_snchild snchild cohortsize female native slmeanfemale slmeannative if snchild==1 , abs(swjahr schoolxtrack) cl(cohort)


*** Do class IDs predict SN status? Do teacher IDs predict SN status?

qui areg snchild  , abs(schoolxtrackxyear)
predict resid , resid
reghdfe resid , abs(classfix)
reghdfe resid , abs(lehrerid)
drop resid 



******************** MAIN TABLE ********************


*** Panel (a): All children, Level: within school-track x year

areg std_composite shsnchildren snchild clsizefix                     , abs(schoolxtrackxyear) cl(classfix)
areg std_composite shsnchildren snchild clsizefix $control            , abs(schoolxtrackxyear) cl(classfix)
areg std_composite shsnchildren snchild clsizefix $control $clcontrol , abs(schoolxtrackxyear) cl(classfix)


*** Panel (b): Non-SN children, Level: within school-track x year

areg std_composite shsnchildren clsizefix                     if snchild == 0 , abs(schoolxtrackxyear) cl(classfix)
areg std_composite shsnchildren clsizefix $control            if snchild == 0 , abs(schoolxtrackxyear) cl(classfix)
areg std_composite shsnchildren clsizefix $control $clcontrol if snchild == 0 , abs(schoolxtrackxyear) cl(classfix)


*** Panel (c): SN children, Level: within school-track x year

areg std_composite shsnchildren clsizefix                     if snchild == 1 , abs(schoolxtrackxyear) cl(classfix)
areg std_composite shsnchildren clsizefix $control            if snchild == 1 , abs(schoolxtrackxyear) cl(classfix)
areg std_composite shsnchildren clsizefix $control $clcontrol if snchild == 1 , abs(schoolxtrackxyear) cl(classfix)



******************** SN INTENSITY: NUMBER OF CONTACTS, TOP SN, INTERVALS ********************


*** Panel (a): All children, Level: within school-track x year
egen intsnchildren_std = std(intsnchildren)
areg std_composite intsnchildren     shsnchildren snchild clsizefix $control $clcontrol , abs(schoolxtrackxyear) cl(classfix)
areg std_composite intsnchildren_std shsnchildren snchild clsizefix $control $clcontrol , abs(schoolxtrackxyear) cl(classfix)

*** Panel (b): All children, Level: within school-track x year
areg std_composite shtopsn           snchild clsizefix $control $clcontrol , abs(schoolxtrackxyear) cl(classfix)

*** Panel (c): All children, Level: within school-track x year
areg std_composite shintsnchildren_* snchild clsizefix $control $clcontrol , abs(schoolxtrackxyear) cl(classfix)

*** Alternative measures for intensity: contacts per year 
preserve
gen timeatsps = (pddatumabschlussfirst - pddatumanmeldungfirst)/(365.25/12) if snchild==1
replace timeatsps = 0 if snchild==0
replace timeatsps = . if (timeatsps<0  & snchild==1)
gen contactxmo = (intsnchild/timeatsps)
replace contactxmo = 0 if snchild==0
drop if contactxmo==.
bysort classfix: egen intsnpermo = total(contactxmo)
egen intsnpermo_std = std(intsnpermo)
areg std_composite intsnpermo     shsnchildren snchild clsizefix $control $clcontrol , abs(schoolxtrackxyear) cl(classfix)
areg std_composite intsnpermo_std shsnchildren snchild clsizefix $control $clcontrol , abs(schoolxtrackxyear) cl(classfix)
sum intsnpermo
restore
sum intsnchildren



******************** SN TYPE: DISRUPTIVE, LEARNING PROBLEM (NON-DISRUPTIVE) ********************


*** Panel (a): All children, Level: within school-track x year, Type learning problem (non-disruptive)
areg std_composite shlearn      learnproblem clsizefix $control $clcontrol , abs(schoolxtrackxyear) cl(classfix)

*** Panel (b): All children, Level: within school-track x year, Type: disruptive
areg std_composite shdisruptive disruptive clsizefix $control $clcontrol , abs(schoolxtrackxyear) cl(classfix)

*** Panel (c): All children, Level: within school-track x year, Type: disruptive and learning problem
areg std_composite shlearn learnproblem shdisruptive disruptive clsizefix $control $clcontrol , abs(schoolxtrackxyear) cl(classfix)
test learnproblem = disruptive
test shlearn = shdisruptive



******************** QUANTILE REGRESSIONS ********************


*** All children, Level: within school-track x year

capture drop b_* cl_* cu_* qt_*
forvalues qt = 10(5)90 {
	xi: qui xtrifreg std_composite shsnchildren snchild clsizefix $control $clcontrol , fe i(schoolxtrackxyear) q(`qt')
    gen b_`qt'  = _b[shsnchildren]
    gen cl_`qt' = _b[shsnchildren] - 1.96 * _se[shsnchildren]
    gen cu_`qt' = _b[shsnchildren] + 1.96 * _se[shsnchildren]
    gen qt_`qt' = `qt'
}
twoway ///
    (scatter b_10 qt_10 , msize(*0.7) mcolor(black)) (rspike cl_10 cu_10 qt_10 , lcolor(black) lwidth(*0.5)) ///
    (scatter b_15 qt_15 , msize(*0.7) mcolor(black)) (rspike cl_15 cu_15 qt_15 , lcolor(black) lwidth(*0.5)) ///
    (scatter b_20 qt_20 , msize(*0.7) mcolor(black)) (rspike cl_20 cu_20 qt_20 , lcolor(black) lwidth(*0.5)) ///
    (scatter b_25 qt_25 , msize(*0.7) mcolor(black)) (rspike cl_25 cu_25 qt_25 , lcolor(black) lwidth(*0.5)) ///
	(scatter b_30 qt_30 , msize(*0.7) mcolor(black)) (rspike cl_30 cu_30 qt_30 , lcolor(black) lwidth(*0.5)) ///
	(scatter b_35 qt_35 , msize(*0.7) mcolor(black)) (rspike cl_35 cu_35 qt_35 , lcolor(black) lwidth(*0.5)) ///
	(scatter b_40 qt_40 , msize(*0.7) mcolor(black)) (rspike cl_40 cu_40 qt_40 , lcolor(black) lwidth(*0.5)) ///
	(scatter b_45 qt_45 , msize(*0.7) mcolor(black)) (rspike cl_45 cu_45 qt_45 , lcolor(black) lwidth(*0.5)) ///
    (scatter b_50 qt_50 , msize(*0.7) mcolor(black)) (rspike cl_50 cu_50 qt_50 , lcolor(black) lwidth(*0.5)) ///
    (scatter b_55 qt_55 , msize(*0.7) mcolor(black)) (rspike cl_55 cu_55 qt_55 , lcolor(black) lwidth(*0.5)) ///
    (scatter b_60 qt_60 , msize(*0.7) mcolor(black)) (rspike cl_60 cu_60 qt_60 , lcolor(black) lwidth(*0.5)) ///
    (scatter b_65 qt_65 , msize(*0.7) mcolor(black)) (rspike cl_65 cu_65 qt_65 , lcolor(black) lwidth(*0.5)) ///
	(scatter b_70 qt_70 , msize(*0.7) mcolor(black)) (rspike cl_70 cu_70 qt_70 , lcolor(black) lwidth(*0.5)) ///
	(scatter b_75 qt_75 , msize(*0.7) mcolor(black)) (rspike cl_75 cu_75 qt_75 , lcolor(black) lwidth(*0.5)) ///
	(scatter b_80 qt_80 , msize(*0.7) mcolor(black)) (rspike cl_80 cu_80 qt_80 , lcolor(black) lwidth(*0.5)) ///
	(scatter b_85 qt_85 , msize(*0.7) mcolor(black)) (rspike cl_85 cu_85 qt_85 , lcolor(black) lwidth(*0.5)) ///
	(scatter b_90 qt_90 , msize(*0.7) mcolor(black)) (rspike cl_90 cu_90 qt_90 , lcolor(black) lwidth(*0.5)) ///
    in 1 , ///
    yline(0, lpattern(dot) lcolor(black)) ///
    xtitle("Test score percentile", size(*0.9)) ytitle("Effect size", size(*0.9)) ///
    graphregion(color(white)) bgcolor(white) ///
    xlabel(10(10)90, labsize(*0.8) nogrid) ylabel(-1.5(0.5)0.5, labsize(*0.8) nogrid) ///
    legend(order(1 "Estimate" 2 "95% Confidence interval") symxsize(5) region(color(white)))
graph export "$outputdir/figures/qte.pdf" , replace
graph export "$outputdir/figures/qte.eps" , as(eps) preview(off) replace



******************** NONLINEAR EFFECTS (SPLINES) ********************


*** All children, Splines on the number of SN children

preserve

mkspline2 rtcs = shsnchildren , cubic nknots(3) displayknots

qui areg std_composite rtcs* snchild clsizefix $control $clcontrol , abs(schoolxtrackxyear) cl(classfix)
capture drop exp cilo ciup
adjustrcspline , generate(exp cilo ciup)   link(identity) level(95) ciopts(color(gs14)) lineopts(lcolor(black)) ///
	yline(0, lw(*0.6) lp(dot) lc(black)) ///
    graphregion(color(white)) bgcolor(white)  ///
    ylabel(-0.6(0.2)0.2, labsize(*0.9) nogrid) ytitle("Predicted test score") ///
    xlabel(0(0.2)1, labsize(*0.9) nogrid) xtitle("Proportion of SN peers") ///
	title("A. Predicted test scores (with 95% confidence intervals)", color(black) size(*0.8)) ///
	name(outcome, replace)

capture drop mfx mcilo mciup
mfxrcspline , generate(mfx mcilo mciup) link(identity) level(95) ciopts(color(gs14)) lineopts(lcolor(black)) ///
	yline(0, lw(*0.6) lp(dot) lc(black)) ///
    graphregion(color(white)) bgcolor(white)  ///
    ylabel(-1.5(0.5)0.5, labsize(*0.9) nogrid) ytitle("Marginal effect of SN peers") ///
    xlabel(0(0.2)1, labsize(*0.9) nogrid) xtitle("Proportion of SN peers") ///
	title("B. Non-linear effect of SN (with 95% confidence intervals)", color(black) size(*0.8)) ///
	name(effect, replace)

graph combine outcome effect , col(1) graphregion(color(white)) xcommon ysize(7.5)
graph export "$outputdir/figures/splines.pdf" , replace
graph export "$outputdir/figures/splines.eps" , as(eps) preview(off) replace

restore



******************** ROBUSTNESS CHECKS I : OUTCOME AND TREATMENT ********************


*** INTERACTION TERM
areg std_composite c.shsnchildren##i.snchild clsizefix $control $clcontrol , abs(schoolxtrackxyear) cl(classfix)

*** BY SUBJECT
areg std_pptmathematik shsnchildren snchild clsizefix $control $clcontrol , abs(schoolxtrackxyear) cl(classfix)
areg std_pptdeutsch    shsnchildren snchild clsizefix $control $clcontrol , abs(schoolxtrackxyear) cl(classfix)

*** NUMBER OF SN PEERS INSTEAD OF SHARE
areg std_composite snchildren snchild clsizefix $control $clcontrol , abs(schoolxtrackxyear) cl(classfix)

*** ALTERNATIVE PEER MEASURE: AT LEAST 1 SN PEER
areg std_composite atleastonesn snchild clsizefix $control $clcontrol , abs(schoolxtrackxyear) cl(classfix)

*** COHORT LEVEL ANALYSIS
reghdfe std_composite sshare_snchild snchild cohortsize $control $slcontrol , abs(swjahr schoolxtrack) cl(cohort)



******************** ROBUSTNESS CHECKS II : FE AND RESTRICTIONS ********************


*** ALTERNATIVE SPECIFICATION OF THE FEs
reghdfe std_composite shsnchildren snchild clsizefix $control            , abs(swjahr schoolxtrack) cl(classfix)
reghdfe std_composite shsnchildren snchild clsizefix $control $clcontrol , abs(swjahr schoolxtrack) cl(classfix)

*** TEACHER FE
reghdfe std_composite shsnchildren snchild clsizefix $control            , abs(swjahr schoolxtrack lehrerid) cl(classfix)
reghdfe std_composite shsnchildren snchild clsizefix $control $clcontrol , abs(swjahr schoolxtrack lehrerid) cl(classfix)

*** BY TRACK
areg std_composite shsnchildren snchild clsizefix $control $clcontrol if hightrack==0 , abs(schoolxyear) cl(classfix)
areg std_composite shsnchildren snchild clsizefix $control $clcontrol if hightrack==1 , abs(schoolxyear) cl(classfix)

***  FOOTNOTE ON DOMESTIC VIOLENCE
sum shfamprob
sum intsnchild if snchild==1
sum intsnchild if famprob==1
reghdfe std_composite shfamprob famprob cohortsize $control $slcontrol , abs(swjahr schoolxtrack) cl(cohort)

*** SN DECILE GRAPH
preserve
xtile qtsnchild = intsnchild if snchild==1 , nq(12)
qui tab qtsnchild, gen(sndecile_)
qui areg std_composite shsnchildren snchild clsizefix $control $clcontrol , abs(schoolxtrackxyear) cl(classfix)
gen b_0  = _b[shsnchildren]
gen cl_0 = _b[shsnchildren] - 1.96 * _se[shsnchildren]
gen cu_0 = _b[shsnchildren] + 1.96 * _se[shsnchildren]
gen qt_0 = 0
forvalues qt = 1(1)9 {
drop shsnchildren
replace snchild = 0 if sndecile_`qt' == 1
bysort classfix: egen total = total(snchild)
generate shsnchildren = ((total - snchild) / (clsizefix - 1))
drop total
qui areg std_composite shsnchildren snchild clsizefix $control $clcontrol , abs(schoolxtrackxyear) cl(classfix)
qui gen b_`qt'  = _b[shsnchildren]
qui gen cl_`qt' = _b[shsnchildren] - 1.96 * _se[shsnchildren]
qui gen cu_`qt' = _b[shsnchildren] + 1.96 * _se[shsnchildren]
qui gen qt_`qt' = `qt'
}
twoway ///
    (scatter b_0 qt_0 , msize(*0.7) mcolor(black)) (rspike cl_0 cu_0 qt_0 , lcolor(black) lwidth(*0.5)) ///
    (scatter b_1 qt_1 , msize(*0.7) mcolor(black)) (rspike cl_1 cu_1 qt_1 , lcolor(black) lwidth(*0.5)) ///
    (scatter b_2 qt_2 , msize(*0.7) mcolor(black)) (rspike cl_2 cu_2 qt_2 , lcolor(black) lwidth(*0.5)) ///
    (scatter b_3 qt_3 , msize(*0.7) mcolor(black)) (rspike cl_3 cu_3 qt_3 , lcolor(black) lwidth(*0.5)) ///
    (scatter b_4 qt_4 , msize(*0.7) mcolor(black)) (rspike cl_4 cu_4 qt_4 , lcolor(black) lwidth(*0.5)) ///
    (scatter b_5 qt_5 , msize(*0.7) mcolor(black)) (rspike cl_5 cu_5 qt_5 , lcolor(black) lwidth(*0.5)) ///
    (scatter b_6 qt_6 , msize(*0.7) mcolor(black)) (rspike cl_6 cu_6 qt_6 , lcolor(black) lwidth(*0.5)) ///
    (scatter b_7 qt_7 , msize(*0.7) mcolor(black)) (rspike cl_7 cu_7 qt_7 , lcolor(black) lwidth(*0.5)) ///
    (scatter b_8 qt_8 , msize(*0.7) mcolor(black)) (rspike cl_8 cu_8 qt_8 , lcolor(black) lwidth(*0.5)) ///
    (scatter b_9 qt_9 , msize(*0.7) mcolor(black)) (rspike cl_9 cu_9 qt_9 , lcolor(black) lwidth(*0.5)) ///
    in 1 , ///
    yline(0, lpattern(dot) lcolor(black)) ///
    xtitle("Percentile of special needs severity", size(*0.9)) ytitle("Effect size", size(*0.9)) ///
    graphregion(color(white)) bgcolor(white) ///
    xlabel(0(1)9, labsize(*0.8) nogrid) ylabel(-1.5(0.5)0, labsize(*0.8) nogrid) ///
    legend(order(1 "Estimate" 2 "95% Confidence interval") symxsize(5) region(color(white)))
graph export "$outputdir/figures/pct.pdf" , replace
graph export "$outputdir/figures/pct.eps" , as(eps) preview(off) replace
restore


******************** MID-TERM OUTCOMES (APPRENTICESHIP AND HIGH SCHOOL) ********************


preserve
keep if swjahr<2016
keep if postcompulsory!=.
sum postcompulsory vetvsaca vetquality

*** Panel (a): All children, Level: within school-track x year
foreach y of varlist postcompulsory vetvsaca vetquality {
	areg `y' shsnchildren snchild clsizefix $control $clcontrol , abs(schoolxtrackxyear) cl(classfix)
}

*** Panel (b): Non-SN children, Level: within school-track x year
foreach y of varlist postcompulsory vetvsaca vetquality {
	areg `y' shsnchildren clsizefix $control $clcontrol if snchild == 0 , abs(schoolxtrackxyear) cl(classfix)
}

*** Panel (c): SN children, Level: within school-track x year
foreach y of varlist postcompulsory vetvsaca vetquality {
	areg `y' shsnchildren clsizefix $control $clcontrol if snchild == 1 , abs(schoolxtrackxyear) cl(classfix)
}
restore



******************** LONG-TERM OUTCOMES (INCOME AND EMPOLOYMENT) ********************


*** Average/Last/Highest employment and earnings (always the same people, last is 2016)

preserve
keep if avg_employed!=.
keep if highest_employed!=.
keep if bfsdduree2016!=.
sum avg_monthly highest_monthly monthlyinc2016
egen qtincome1 = xtile(avg_monthly)     , nq(100)
egen qtincome2 = xtile(highest_monthly) , nq(100)
egen qtincome3 = xtile(monthlyinc2016)  , nq(100)
drop if qtincome1==1
drop if qtincome2==1
drop if qtincome3==1
drop if qtincome1==100
drop if qtincome2==100
drop if qtincome3==100
sum avg_employed highest_employed bfsdduree2016 ///
    avg_monthly  highest_monthly  monthlyinc2016
foreach y of varlist avg_monthly highest_monthly monthlyinc2016 {
	gen ln_`y' = ln(`y')
}
gen birthyear = year(bfsgeburtsdatum)
sum ln_monthlyinc2016
foreach y of varlist avg_employed   highest_employed   bfsdduree2016 ///
                     ln_avg_monthly ln_highest_monthly ln_monthlyinc2016 ///
                     {
areg `y' shsnchildren snchild clsizefix $control $clcontrol i.birthyear , abs(schoolxtrackxyear) cl(classfix)
}
restore


*** 2016 employment and earnings (sample restricted to those with VET)

preserve
sum bsabschluss
keep if bsabschluss==1
keep if bfsdduree2016!=.
sum bfsdduree2016 monthlyinc2016
egen qtincome = xtile(monthlyinc2016) , nq(100)
drop if qtincome==1
drop if qtincome==100
sum bfsdduree2016 monthlyinc2016
gen birthyear = year(bfsgeburtsdatum)
gen ln_minc16 = ln(monthlyinc2016)
areg bfsdduree2016  shsnchildren snchild clsizefix $control $clcontrol i.birthyear , abs(schoolxtrackxyear) cl(classfix)
areg ln_minc16      shsnchildren snchild clsizefix $control $clcontrol i.birthyear , abs(schoolxtrackxyear) cl(classfix)
restore

